import pandas as pd
import numpy as np

Series In Pandas

pd.Series(data=[1,2,3,4,5],index=['a','b','c','d','e'])
myD= {'e':20,'f':200,'g':2000,'h':15}
pd.Series(myD)
s1 = pd.Series([1,2,3,4,5],['a','b','c','d','e'])
s2 = pd.Series([1,2,3,4,5],['a','b','c','f','e'])
s1+s2

Data-Frames in Pandas

from numpy.random import randn
data = randn(3,4)
data
df = pd.DataFrame(data,['R1','R2','R3'],['C1','C2','C3','C4'])#R1=Row,C1=Column
df #we are passing above generated data into above to make dataframe
df['C1']
df[['C1','C3']]

Operations on (df) - Dataframe using pandas

df['C5'] = df['C1']*df['C3']  #we have added new columnc C5
#df['C1']*df['C3']  here we are doing multiplication to get values in C5
df

Delete any Value

df.drop('C3',axis=1,inplace=True)  

In drop statement first we have to pass the column label then axis means top to bottom after that inplace= True is for to make the changes permanently so it will reflect in the data

Axis = 1 means column and Axis = 0 means rows

df

Sub-Selecting using Pandas

df.loc['R2'# we are selecting the raw using row name
df.iloc[1#we are selecting the raw using raw index number
df.loc[['R1','R2'],['C4','C5']]  #we are sub-selecting the raw and column 
#using raw-label and column-label
df.loc[['R2','R3'],['C1','C2']]
df.loc[['R1','R2'],['C1','C2']]

Conditional selection in PANDAS

df
df>0
df[df>0# to get data in values but in the place of the false the values will be NaN
df[df['C5']>0#you don't get null values you will get the real data R2 has null so it will not show

Multiple conditions in PANDAS

df
df[df['C1']>0]
df[(df['C5']>0) & (df['C4']>0)]
df
new_index=['row1','row2','row3']
df['new_index']=new_index
df
df.set_index('new_index')
df.rename(columns={'C1':'col_1','C2':'col3','C4':'col3','C5':'col4'})

Basics of datacleanup

mydata = {'me':[3,5,np.nan],'f1':[4,np.nan,np.nan],'f2':[3,5,4]}
mydata
{'me': [3, 5, nan], 'f1': [4, nan, nan], 'f2': [3, 5, 4]}
mydataframe=pd.DataFrame(mydata)
mydataframe
mydataframe.dropna() #it is going to drop null or NaN values
mydataframe.fillna(value=mydataframe['me'].mean()) 

It will take 'me' column's values as reference and take mean of the values present in the column and replace that gained value to all the NaN of null

Merging the data and operations

newdata= {
    'Movie':['Star-Wars','Star-Wars','Batman','Batman','Deadpool','Deadpool'],
    'Viwers':['Me','Mf','MOF','Bobby','Dean','Sammy'],
    'Ratings':[10,7,6,9,8,5]
}
movies = pd.DataFrame(newdata)
movies
movies.groupby('Movie').mean()
movies.groupby('Movie').sum()
movies.groupby('Movie').std().iloc[2]

Reading and writing files

df = pd.read_csv('/data/notebook_files/Demo.csv')
df
#to export the above file  = df.to_csv('new_demo.csv',index=False)
df.to_csv('new_demo.csv',index=False)
dt=pd.read_csv('new_demo.csv')
dt
Created using Figma